**************************************/
/******************************************************************************/
/* Analytical Dataset Construction and Regression for Spillover Tables 		  */
/* Input Data:  QWI data Q4 data (no need to merge with SOD)                  */
/*              Demographic Data                                              */
/*              County Small bank shares (2000)                               */
/*                                                                            */
/******************************************************************************/
/******************************************************************************/
local Data "CountyYearFirmSizes2digit_RFS"

noi disp "Obtaining county industrial data..."
noi disp "Verifying that `Data'.dta does not already exist..."
* This returns an error if the file does not exist.
capture confirm file `Data'.dta

quietly{


/******************************************************************************/
/******************************************************************************/
/* Loading Data                                                               */
/******************************************************************************/
/******************************************************************************/

* If an error is returned, then the commands below are run.
if _rc == 601 {
	noi disp "Data not found.  Building data..."
	noi disp "Loading US_county_firmsize_q2.csv..."
	noi etime
	insheet using "$SourceData\US_county_firmsize.csv", clear
	noi disp "Finished loading."
	noi etime
	drop race ethnicity education firmage ownercode sex agegrp quarter semp

	
	*Use total county by firm size less financial by firm size
	*Industry=0 indicates across all industries. geo_level!="S" kicks out state aggregates
	*geography is county
	keep if industry!="0" & geo_level!="S"

	* Verify that there are no duplicates
	isid geography year firmsize industry
	
	gen emp_size = emp
	label var emp "Total Employment"

	keep geography industry year firmsize emp_size

	/* Combining Lowest Three Employment Categories into one */
	gen Firm123 = emp_size if firmsize==1 | firmsize==2 | firmsize==3
	* Finds total of all firms of size 1, 2, and 3
	bysort year geography industry: egen TotalFirm123 = sum(Firm123)
	drop Firm123 
	* Replace observation from firm size 1 with total of firms 1, 2, and 3
	replace emp_size = TotalFirm123 if firmsize==1
	drop TotalFirm123 
	* Keep only two sizes: Firms 1,2,3 and Firms 5. Keep total employment for shares.
	keep if firmsize==1 | firmsize==5 | firmsize==0

	* Drop if emp_size is missing
	* Note: These missings are not coded as zeros
	drop if missing(emp_size)

	* Keep only if the first year observed for each county is 2000.
	bysort geography: egen minyear = min(year)
	keep if minyear==2000
	drop minyear
	* Keep only if the last year observed for each county is 2017.
	bysort geography: egen maxyear = max(year)
	keep if maxyear==2017
	drop maxyear
	* Note that there is a large firm and small firm observation for each county-year).

	noi disp "Reshaping data where i(geography year) and j(firmsize)."
	reshape wide emp, i(geography industry year) j(firmsize)
	
	

*Require Observations of Large and Small Firm sizes for each county-year
noi drop if missing(emp_size5) | missing(emp_size1)
	
*County shares by firmsize
egen tot_emp = rowtotal(emp_size1 emp_size5)
egen geoind = group(geography industry)

bysort geography year: egen county_emp0 = sum(emp_size0)
bysort geography year: egen county_emp1 = sum(emp_size1)
bysort geography year: egen county_emp5 = sum(emp_size5)

gen county_noti_emp0 = county_emp0 - emp_size0
gen county_noti_emp1 = county_emp1 - emp_size1
gen county_noti_emp5 = county_emp5 - emp_size5

xtset geoind year
	
*Specify period over which shocks occur
foreach span in 1 3 5 15 {
	gen gLargeFirm`span' 		= log(emp_size5+1) - log(L`span'.emp_size5+1) if emp_size5!=0 & L`span'.emp_size5!=.
	gen gSmallFirm`span' 		= log(emp_size1+1) - log(L`span'.emp_size1+1) if emp_size1!=0 & L`span'.emp_size1!=.
	gen gLargeFirmnoti`span' 	= (log(county_noti_emp5+1) - log(L`span'.county_noti_emp5+1))*county_noti_emp5/county_emp0 if emp_size5!=0 & L`span'.emp_size5!=.
	gen gSmallFirmnoti`span' 	= (log(county_noti_emp1+1) - log(L`span'.county_noti_emp1+1))*county_noti_emp1/county_emp0 if emp_size1!=0 & L`span'.emp_size1!=.
	gen gLargeFirmCounty`span'	= log(county_emp5) - log(L`span'.county_emp5+1)
	gen gSmallFirmCounty`span'	= log(county_emp1) - log(L`span'.county_emp1+1)
}

	noi disp "Saving `Data'.dta"
	compress
	save "$LocalData\\`Data'.dta", replace
	save "$LocalData\Archive\\`Data'`CurrentDate'.dta", replace
}
* This runs if no error was returned.
else noi disp "Data already exists."
noi etime
noi disp " "



/***********************************************************/
/* Obtain national employment growth by industry-firm size */
/***********************************************************/
local Data "CountyIndustryEmp_2digit_RFS"

noi disp "Obtaining county industrial data..."
noi disp "Verifying that `Data'.dta does not already exist..."
* This returns an error if the file does not exist.
capture confirm file `Data'.dta

* If an error is returned, then the commands below are run.
if _rc == 601 {
noi disp "Data not found.  Building data..."
noi disp "Loading US_county_firmsize.csv..."
insheet using "$SourceData\US_county_firmsize.csv", clear
noi disp "Finished loading data"
noi disp "$LocalData"

*We only need total employees by industry, not by-size
	*firmsize = 0 is aggregate employment by county-industry
	keep if firmsize==0
	drop firmsize

*Keeping Relevant (not 52, 92, aggregate, state) County-Industries for Analysis
keep if industry!="52" & industry!="92" & geo_level!="S" & industry!="0"

noi disp "Keeping only county-industy pairs that have observations in both 2000."

*Keeping only data that exist in 2000
keep if year==2000

*Note that in removing industry 52 (and 92) we cannot rely on county aggregates for any employment numbers across industries
*Summing up county-industry employment
*Note: we are setting to "0" the empties by necessity. For smaller counties, this is likely a bigger issue for computing industry shares.
* Generate county-industry employment (c_i_emp)
noi gen c_i_emp = emp
noi replace c_i_emp = 0 if missing(c_i_emp)
label var c_i_emp "County-Industry Employment"

* Generate total county employment (c_emp)
bysort geography: egen c_emp = sum(c_i_emp)
label var c_i_emp "Total County Employment"
* Drop if there is no employment in the county
noi drop if c_emp <= 0
	
* Generating County-Industy Shares in 2000
gen c_i_share_2000 = c_i_emp/c_emp

* Check to insure every county's shares sum to 1.
bysort geography: egen total = sum(c_i_share_2000)
* Due to rounding, some are negligibly less than 1 (0.9999999)
assert total > 0.99
drop total /* Not needed once check is complete */

keep geography industry c_i_share_2000 c_emp
		
*Merging In County Size Shares
* Join by geography
sort geography industry
* Combines datasets horizontally but forming all pairwise combinations within county (geography)
joinby geography industry using "$LocalData\CountyYearFirmSizes2digit_RFS.dta"
		

	sort geograph year
	merge geography year using "$LocalData\Demographic.dta"
	drop _merge
	merge m:1 geography year using "$LocalData\SmallDepShare.dta"
	
	sort geography year
	*Checking unique geography years. 
	by geography year: gen nobs = _N
	su nobs
	drop nobs
	
	*Initial demographic data 
	keep if year>=2000
	*Restrict to observations with first observations in 2000. Lose 58 of 57,827 county-years.
	bysort geography: egen minyear = min(year)
	keep if minyear==2000
	drop minyear
	by geography: gen emp2000 = c_emp[1]
	by geography: gen lnincome2000 = ln(income[1])
	by geography: gen income2000 = income[1]	
	by geography: gen small_dep_share2000 = small_dep_share[1]

	egen empsize_total = rowtotal(emp_size*)
	gen c_sizeshare1 = emp_size1/empsize_total
	by geography: gen sm_size_share2000 = c_sizeshare1[3]
	
	gen pop2000 = exp(ln_pop_2000)

	gen lnemp2000 = ln(emp2000)
	
	*Keeping only counties observed in 2000
	drop if missing(emp2000)
	
	*Defining level variables as 1000s, where appropriate
	replace pop2000 = pop2000/1000
	replace income2000 = income2000/1000
	gen emp_2000 = emp2000/1000

	drop _merge

	*Saving Analytical Dataset	
	compress
	save "$LocalData\\`Data'.dta", replace
	save "$LocalData\Archive\\`Data'_`CurrentDate'.dta", replace

}
* This runs if no error was returned.
else noi disp "Data already exists."
noi etime
noi disp " "
}
